Contents

import pandas as pd

# Import der Daten (zwei Kommentarzeilen werden übersprungen)
daten = pd.read_csv('metall_oder_stein.csv', skiprows=2)
daten.info()

# Check der Vollständigkeit:
print(daten.isnull().sum())

# Blick in die Daten:
daten.head(10)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 208 entries, 0 to 207
Data columns (total 61 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Signal01  208 non-null    float64
 1   Signal02  208 non-null    float64
 2   Signal03  208 non-null    float64
 3   Signal04  208 non-null    float64
 4   Signal05  208 non-null    float64
 5   Signal06  208 non-null    float64
 6   Signal07  208 non-null    float64
 7   Signal08  208 non-null    float64
 8   Signal09  208 non-null    float64
 9   Signal10  208 non-null    float64
 10  Signal11  208 non-null    float64
 11  Signal12  208 non-null    float64
 12  Signal13  208 non-null    float64
 13  Signal14  208 non-null    float64
 14  Signal15  208 non-null    float64
 15  Signal16  208 non-null    float64
 16  Signal17  208 non-null    float64
 17  Signal18  208 non-null    float64
 18  Signal19  208 non-null    float64
 19  Signal20  208 non-null    float64
 20  Signal21  208 non-null    float64
 21  Signal22  208 non-null    float64
 22  Signal23  208 non-null    float64
 23  Signal24  208 non-null    float64
 24  Signal25  208 non-null    float64
 25  Signal26  208 non-null    float64
 26  Signal27  208 non-null    float64
 27  Signal28  208 non-null    float64
 28  Signal29  208 non-null    float64
 29  Signal30  208 non-null    float64
 30  Signal31  208 non-null    float64
 31  Signal32  208 non-null    float64
 32  Signal33  208 non-null    float64
 33  Signal34  208 non-null    float64
 34  Signal35  208 non-null    float64
 35  Signal36  208 non-null    float64
 36  Signal37  208 non-null    float64
 37  Signal38  208 non-null    float64
 38  Signal39  208 non-null    float64
 39  Signal40  208 non-null    float64
 40  Signal41  208 non-null    float64
 41  Signal42  208 non-null    float64
 42  Signal43  208 non-null    float64
 43  Signal44  208 non-null    float64
 44  Signal45  208 non-null    float64
 45  Signal46  208 non-null    float64
 46  Signal47  208 non-null    float64
 47  Signal48  208 non-null    float64
 48  Signal49  208 non-null    float64
 49  Signal50  208 non-null    float64
 50  Signal51  208 non-null    float64
 51  Signal52  208 non-null    float64
 52  Signal53  208 non-null    float64
 53  Signal54  208 non-null    float64
 54  Signal55  208 non-null    float64
 55  Signal56  208 non-null    float64
 56  Signal57  208 non-null    float64
 57  Signal58  208 non-null    float64
 58  Signal59  208 non-null    float64
 59  Signal60  208 non-null    float64
 60  Material  208 non-null    object 
dtypes: float64(60), object(1)
memory usage: 99.3+ KB
Signal01    0
Signal02    0
Signal03    0
Signal04    0
Signal05    0
           ..
Signal57    0
Signal58    0
Signal59    0
Signal60    0
Material    0
Length: 61, dtype: int64
Signal01 Signal02 Signal03 Signal04 Signal05 Signal06 Signal07 Signal08 Signal09 Signal10 ... Signal52 Signal53 Signal54 Signal55 Signal56 Signal57 Signal58 Signal59 Signal60 Material
0 0.0200 0.0371 0.0428 0.0207 0.0954 0.0986 0.1539 0.1601 0.3109 0.2111 ... 0.0027 0.0065 0.0159 0.0072 0.0167 0.0180 0.0084 0.0090 0.0032 Stein
1 0.0453 0.0523 0.0843 0.0689 0.1183 0.2583 0.2156 0.3481 0.3337 0.2872 ... 0.0084 0.0089 0.0048 0.0094 0.0191 0.0140 0.0049 0.0052 0.0044 Stein
2 0.0262 0.0582 0.1099 0.1083 0.0974 0.2280 0.2431 0.3771 0.5598 0.6194 ... 0.0232 0.0166 0.0095 0.0180 0.0244 0.0316 0.0164 0.0095 0.0078 Stein
3 0.0100 0.0171 0.0623 0.0205 0.0205 0.0368 0.1098 0.1276 0.0598 0.1264 ... 0.0121 0.0036 0.0150 0.0085 0.0073 0.0050 0.0044 0.0040 0.0117 Stein
4 0.0762 0.0666 0.0481 0.0394 0.0590 0.0649 0.1209 0.2467 0.3564 0.4459 ... 0.0031 0.0054 0.0105 0.0110 0.0015 0.0072 0.0048 0.0107 0.0094 Stein
5 0.0286 0.0453 0.0277 0.0174 0.0384 0.0990 0.1201 0.1833 0.2105 0.3039 ... 0.0045 0.0014 0.0038 0.0013 0.0089 0.0057 0.0027 0.0051 0.0062 Stein
6 0.0317 0.0956 0.1321 0.1408 0.1674 0.1710 0.0731 0.1401 0.2083 0.3513 ... 0.0201 0.0248 0.0131 0.0070 0.0138 0.0092 0.0143 0.0036 0.0103 Stein
7 0.0519 0.0548 0.0842 0.0319 0.1158 0.0922 0.1027 0.0613 0.1465 0.2838 ... 0.0081 0.0120 0.0045 0.0121 0.0097 0.0085 0.0047 0.0048 0.0053 Stein
8 0.0223 0.0375 0.0484 0.0475 0.0647 0.0591 0.0753 0.0098 0.0684 0.1487 ... 0.0145 0.0128 0.0145 0.0058 0.0049 0.0065 0.0093 0.0059 0.0022 Stein
9 0.0164 0.0173 0.0347 0.0070 0.0187 0.0671 0.1056 0.0697 0.0962 0.0251 ... 0.0090 0.0223 0.0179 0.0084 0.0068 0.0032 0.0035 0.0056 0.0040 Stein

10 rows × 61 columns

daten['Material'].unique()
daten['Material'] = daten['Material'].replace({'Stein': '0', 'Metall': '1'}).astype(int)
daten.describe()
Signal01 Signal02 Signal03 Signal04 Signal05 Signal06 Signal07 Signal08 Signal09 Signal10 ... Signal52 Signal53 Signal54 Signal55 Signal56 Signal57 Signal58 Signal59 Signal60 Material
count 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 ... 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000 208.000000
mean 0.029164 0.038437 0.043832 0.053892 0.075202 0.104570 0.121747 0.134799 0.178003 0.208259 ... 0.013420 0.010709 0.010941 0.009290 0.008222 0.007820 0.007949 0.007941 0.006507 0.533654
std 0.022991 0.032960 0.038428 0.046528 0.055552 0.059105 0.061788 0.085152 0.118387 0.134416 ... 0.009634 0.007060 0.007301 0.007088 0.005736 0.005785 0.006470 0.006181 0.005031 0.500070
min 0.001500 0.000600 0.001500 0.005800 0.006700 0.010200 0.003300 0.005500 0.007500 0.011300 ... 0.000800 0.000500 0.001000 0.000600 0.000400 0.000300 0.000300 0.000100 0.000600 0.000000
25% 0.013350 0.016450 0.018950 0.024375 0.038050 0.067025 0.080900 0.080425 0.097025 0.111275 ... 0.007275 0.005075 0.005375 0.004150 0.004400 0.003700 0.003600 0.003675 0.003100 0.000000
50% 0.022800 0.030800 0.034300 0.044050 0.062500 0.092150 0.106950 0.112100 0.152250 0.182400 ... 0.011400 0.009550 0.009300 0.007500 0.006850 0.005950 0.005800 0.006400 0.005300 1.000000
75% 0.035550 0.047950 0.057950 0.064500 0.100275 0.134125 0.154000 0.169600 0.233425 0.268700 ... 0.016725 0.014900 0.014500 0.012100 0.010575 0.010425 0.010350 0.010325 0.008525 1.000000
max 0.137100 0.233900 0.305900 0.426400 0.401000 0.382300 0.372900 0.459000 0.682800 0.710600 ... 0.070900 0.039000 0.035200 0.044700 0.039400 0.035500 0.044000 0.036400 0.043900 1.000000

8 rows × 61 columns

import plotly.express as px

fig = px.box(daten.drop('Material', axis=1), 
             title='Stein oder Metall',
             labels={'variable': 'Eigenschaft', 'value':'Wert'})
fig.show()
# Berechnung der Anzahl der Einträge
print(daten['Material'].value_counts())

# Visulisierung als Balkendiagramm
fig = px.bar(daten['Material'].value_counts(),
             title='Stein oder Metall')
fig.update_layout(
    yaxis_title='Material',
    xaxis_title='Signal-Nummer',
    showlegend=False
)
fig.show()
Material
1    111
0     97
Name: count, dtype: int64
from sklearn.model_selection import train_test_split

# Kodierung mit Check
daten['Material'] = daten['Material'].replace({'Stein':'0', 'Metall':'1'}).astype('int')
print(daten['Material'].unique())

# Aufteilung in Input/Output
X = daten.drop('Material', axis=1)
y = daten['Material']

# Split 80:20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
[0 1]
from sklearn.tree import DecisionTreeClassifier

# Instanziierung des Entscheidungsbaums mit Training
modell_baum = DecisionTreeClassifier()
modell_baum.fit(X_train,y_train)

# Bewertung
score_train = modell_baum.score(X_train, y_train)
score_test = modell_baum.score(X_test, y_test)
print(f'Score Trainingsdaten Entscheidungsbaum: {score_train :.2f}')
print(f'Score Testdaten Entscheidungsbaum: {score_test :.2f}')
Score Trainingsdaten Entscheidungsbaum: 1.00
Score Testdaten Entscheidungsbaum: 0.67
from sklearn.ensemble import RandomForestClassifier

for n in [1, 5, 10, 20, 50, 100]:
    # Instanziierung des Random-Forest-Modells und Training
    modell_rf = RandomForestClassifier(n_estimators=n, random_state=0)
    modell_rf.fit(X_train, y_train)

    # Bewertung
    score_train = modell_rf.score(X_train, y_train)
    score_test = modell_rf.score(X_test, y_test)
    print(f'Anzahl Entscheidungsbäume: {n}')
    print(f'Score Training: {score_train :.2f} | Score Test: {score_test :.2f}')
    print('')
Anzahl Entscheidungsbäume: 1
Score Training: 0.86 | Score Test: 0.64

Anzahl Entscheidungsbäume: 5
Score Training: 0.97 | Score Test: 0.74

Anzahl Entscheidungsbäume: 10
Score Training: 0.99 | Score Test: 0.86

Anzahl Entscheidungsbäume: 20
Score Training: 1.00 | Score Test: 0.81

Anzahl Entscheidungsbäume: 50
Score Training: 1.00 | Score Test: 0.88
Anzahl Entscheidungsbäume: 100
Score Training: 1.00 | Score Test: 0.88
from sklearn.svm  import SVC

# Instanziierung einer linearen SVM und Training
svm_linear = SVC(kernel='linear')
svm_linear.fit(X_train, y_train)

# Bewertung lineare SVM
score_train = svm_linear.score(X_train, y_train)
score_test = svm_linear.score(X_test, y_test)
print(f'Score Trainingsdaten lineare SVM: {score_train :.2f}')
print(f'Score Testdaten lineare SVM: {score_test :.2f}')

# Instanziierung einer nichtlinearen SVM und Training
svm_rbf = SVC(kernel='rbf')
svm_rbf.fit(X_train, y_train)

# Bewertung nichtlineare SVM
score_train = svm_rbf.score(X_train, y_train)
score_test = svm_rbf.score(X_test, y_test)
print(f'Score Trainingsdaten lineare SVM: {score_train :.2f}')
print(f'Score Testdaten lineare SVM: {score_test :.2f}')
Score Trainingsdaten lineare SVM: 0.86
Score Testdaten lineare SVM: 0.86
Score Trainingsdaten lineare SVM: 0.88
Score Testdaten lineare SVM: 0.79